# Load packages
import episcanpy as epi
import scanpy as sc
import anndata as ad
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import scmoib
import sklearn
# figure settings
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=250,
frameon=True, vector_friendly=True,
color_map="YlGnBu", format='pdf', transparent=False,
ipython_format='png2x')
DATADIR = "/storage/groups/ce01/workspace/mobisc_anna/integrated_outputs/liger"
%ls $DATADIR
PBMC_10X_jNMF.h5Seurat rplot_share_seq_brain.pdf share_seq_brain.h5ad PBMC_10X_jNMF.h5ad rplot_snare_seq_brain.pdf snare_seq_brain.h5Seurat rplot_PBMC_10X.pdf share_seq_brain.h5Seurat snare_seq_brain.h5ad
pbmc = ad.read(os.path.join(DATADIR, 'PBMC_10X_jNMF.h5ad'))
pbmc
/home/icb/atai.dobrynin/miniconda3/envs/scanpy_env/lib/python3.6/site-packages/anndata/compat/__init__.py:183: FutureWarning: Moving element from .uns['neighbors']['distances'] to .obsp['distances']. This is where adjacency matrices should go now.
AnnData object with n_obs × n_vars = 21984 × 2000
obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'X', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'log_n_genes', 'filter_barcodes', 'broad.cell.types', 'louvain2_rna', 'nb_features', 'log_nb_features', 'louvain_lsi', 'n_counts', 'louvain_pca', 'louvain', 'filter', 'batch', 'omic_batch', 'group', 'clusters', 'RNA_snn_res.0.3', 'seurat_clusters'
var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable'
uns: 'neighbors'
obsm: 'X_iNMF', 'X_iNMF_raw', 'X_pca', 'X_umap'
varm: 'INMF_RAW', 'PCs'
obsp: 'distances'
sc.pp.neighbors(pbmc, use_rep='X_iNMF')
bc_list = list(pbmc.obs.index)
bc_list1 = bc_list[:len(bc_list) // 2]
bc_list2 = bc_list[len(bc_list) // 2:]
my_obj = scmoib.tl.MetricsCalculator()
print(scmoib.tl.metrics.average_distance_between_matching_barcodes(pbmc,
bc_list1,
bc_list2,
cell_type='broad.cell.types',
absolute=True))
my_obj._average_dist_between_matching_barcodes(pbmc,
'pbmc_liger',
bc_list1,
bc_list2,
cell_type='broad.cell.types')
pbmc
{'NK': 49.978601875840546, 'DC': 67.38911952860646, 'CD14 Monocytes': 70.56895803613638, 'plasma': 54.752941003628905, 'Megakaryocytes': 57.073308298874316, 'FCGR3A Monocytes': 68.18874407882622, 'B': 51.997281294672455, 'CD8 T': 45.182756862169505, 'Memory CD4 T': 42.1801996271665, 'Naive CD4 T': 42.863625286557244}
AnnData object with n_obs × n_vars = 21984 × 2000
obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'X', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'log_n_genes', 'filter_barcodes', 'broad.cell.types', 'louvain2_rna', 'nb_features', 'log_nb_features', 'louvain_lsi', 'n_counts', 'louvain_pca', 'louvain', 'filter', 'batch', 'omic_batch', 'group', 'clusters', 'RNA_snn_res.0.3', 'seurat_clusters', 'euclidean_pairwise_distance_between_matching_barcodes'
var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable'
uns: 'neighbors', 'average_dist_per_cluster'
obsm: 'X_iNMF', 'X_iNMF_raw', 'X_pca', 'X_umap'
varm: 'INMF_RAW', 'PCs'
obsp: 'distances', 'connectivities'
print(scmoib.tl.metrics.average_distance_between_matching_barcodes(pbmc, bc_list1, bc_list2))
my_obj._average_dist_between_matching_barcodes(pbmc, 'pbmc_liger', bc_list1, bc_list2)
my_obj.get_df()
54.84437225373786
| pairwise_distance | |
|---|---|
| pbmc_liger | 54.844372 |
print(scmoib.tl.metrics.accuracy_paired_omics(pbmc, bc_list1, bc_list2, 'omic_batch', 'seurat_clusters'))
my_obj._accuracy_paired_omics(pbmc, 'pbmc_liger', bc_list1, bc_list2, 'omic_batch', 'seurat_clusters')
my_obj.get_df()
0.2013282387190684
| accuracy | pairwise_distance | |
|---|---|---|
| pbmc_liger | 0.201328 | 54.844372 |
print(scmoib.tl.metrics.accuracy_paired_omics_per_cell_type(pbmc,
bc_list1,
bc_list2,
'omic_batch',
'seurat_clusters',
'broad.cell.types'))
my_obj._accuracy_paired_omics_per_cell_type(pbmc, 'pbmc_liger', bc_list1, bc_list2,
'omic_batch', 'seurat_clusters', 'broad.cell.types')
print(pbmc.uns['acc_cell_type'])
{'B': 0.27438468550592526, 'CD14 Monocytes': 0.2530429212043562, 'CD8 T': 0.09559748427672957, 'DC': 0.06785714285714285, 'FCGR3A Monocytes': 0.2545045045045045, 'Megakaryocytes': 0.2487360970677452, 'Memory CD4 T': 0.12619926199261994, 'NK': 0.01366120218579235, 'Naive CD4 T': 0.2755632582322357, 'plasma': 0.08571428571428572}
{'B': 0.27438468550592526, 'CD14 Monocytes': 0.2530429212043562, 'CD8 T': 0.09559748427672957, 'DC': 0.06785714285714285, 'FCGR3A Monocytes': 0.2545045045045045, 'Megakaryocytes': 0.2487360970677452, 'Memory CD4 T': 0.12619926199261994, 'NK': 0.01366120218579235, 'Naive CD4 T': 0.2755632582322357, 'plasma': 0.08571428571428572}
print(scmoib.tl.metrics.graph_connectivity(pbmc, 'omic_batch'))
my_obj._graph_connectivity(pbmc, 'pbmc_liger', 'omic_batch')
my_obj.get_df()
0.9940866084425036
| accuracy | graph connectivity omic_batch | pairwise_distance | |
|---|---|---|---|
| pbmc_liger | 0.201328 | 0.994087 | 54.844372 |
print(scmoib.tl.metrics.graph_connectivity(pbmc, 'broad.cell.types'))
my_obj._graph_connectivity(pbmc, 'pbmc_liger', 'broad.cell.types')
my_obj.get_df()
0.6279126635147454
| accuracy | graph connectivity broad.cell.types | graph connectivity omic_batch | pairwise_distance | |
|---|---|---|---|---|
| pbmc_liger | 0.201328 | 0.627913 | 0.994087 | 54.844372 |
print(scmoib.tl.metrics.ami(pbmc, 'broad.cell.types', 'seurat_clusters'))
my_obj.ami(pbmc, 'pbmc_liger', 'broad.cell.types', 'seurat_clusters')
my_obj.get_df()
0.5048331010496683
| AMI | accuracy | graph connectivity broad.cell.types | graph connectivity omic_batch | pairwise_distance | |
|---|---|---|---|---|---|
| pbmc_liger | 0.504833 | 0.201328 | 0.627913 | 0.994087 | 54.844372 |
print(scmoib.tl.metrics.ari(pbmc, 'broad.cell.types', 'seurat_clusters'))
my_obj.ari(pbmc, 'pbmc_liger', 'broad.cell.types', 'seurat_clusters')
my_obj.get_df()
0.35482446371486587
| AMI | ARI | accuracy | graph connectivity broad.cell.types | graph connectivity omic_batch | pairwise_distance | |
|---|---|---|---|---|---|---|
| pbmc_liger | 0.504833 | 0.354824 | 0.201328 | 0.627913 | 0.994087 | 54.844372 |
print(scmoib.tl.metrics.nmi(pbmc, 'broad.cell.types', 'seurat_clusters'))
my_obj.nmi(pbmc, 'pbmc_liger', 'broad.cell.types', 'seurat_clusters')
my_obj.get_df()
0.5057174898195305
| AMI | ARI | NMI | accuracy | graph connectivity broad.cell.types | graph connectivity omic_batch | pairwise_distance | |
|---|---|---|---|---|---|---|---|
| pbmc_liger | 0.504833 | 0.354824 | 0.505717 | 0.201328 | 0.627913 | 0.994087 | 54.844372 |
print(scmoib.tl.metrics.homogeneity(pbmc, 'broad.cell.types', 'seurat_clusters'))
my_obj.homogeneity(pbmc, 'pbmc_liger', 'broad.cell.types', 'seurat_clusters')
my_obj.get_df()
0.6103135222570035
| AMI | ARI | NMI | accuracy | graph connectivity broad.cell.types | graph connectivity omic_batch | homogeneity | pairwise_distance | |
|---|---|---|---|---|---|---|---|---|
| pbmc_liger | 0.504833 | 0.354824 | 0.505717 | 0.201328 | 0.627913 | 0.994087 | 0.610314 | 54.844372 |
print(scmoib.tl.metrics.silhouette(pbmc, batch_key='omic_batch', cell_label='broad.cell.types'))
my_obj.silhouette(pbmc, 'pbmc_liger', batch_key='omic_batch',
cell_label='broad.cell.types', embed='X_pca')
my_obj.get_df()
/home/icb/atai.dobrynin/miniconda3/envs/scanpy_env/lib/python3.6/site-packages/numpy/core/fromnumeric.py:3373: RuntimeWarning: Mean of empty slice. /home/icb/atai.dobrynin/miniconda3/envs/scanpy_env/lib/python3.6/site-packages/numpy/core/_methods.py:170: RuntimeWarning: invalid value encountered in double_scalars /home/icb/atai.dobrynin/miniconda3/envs/scanpy_env/lib/python3.6/site-packages/numpy/core/fromnumeric.py:3373: RuntimeWarning: Mean of empty slice. /home/icb/atai.dobrynin/miniconda3/envs/scanpy_env/lib/python3.6/site-packages/numpy/core/_methods.py:170: RuntimeWarning: invalid value encountered in double_scalars
(0.5120715322309394, 0.4665468545502218, nan, nan)
/home/icb/atai.dobrynin/miniconda3/envs/scanpy_env/lib/python3.6/site-packages/numpy/core/fromnumeric.py:3373: RuntimeWarning: Mean of empty slice. /home/icb/atai.dobrynin/miniconda3/envs/scanpy_env/lib/python3.6/site-packages/numpy/core/_methods.py:170: RuntimeWarning: invalid value encountered in double_scalars /home/icb/atai.dobrynin/miniconda3/envs/scanpy_env/lib/python3.6/site-packages/numpy/core/fromnumeric.py:3373: RuntimeWarning: Mean of empty slice. /home/icb/atai.dobrynin/miniconda3/envs/scanpy_env/lib/python3.6/site-packages/numpy/core/_methods.py:170: RuntimeWarning: invalid value encountered in double_scalars
| AMI | ARI | NMI | accuracy | graph connectivity broad.cell.types | graph connectivity omic_batch | homogeneity | il_score_clus | il_score_sil | pairwise_distance | sil_clus | sil_global | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| pbmc_liger | 0.504833 | 0.354824 | 0.505717 | 0.201328 | 0.627913 | 0.994087 | 0.610314 | NaN | NaN | 54.844372 | 0.466547 | 0.512072 |
%%time
my_obj.node_metrics(pbmc, 'pbmc_liger', bc_list1, bc_list2, 'broad.cell.types', 8)
CPU times: user 2.07 s, sys: 1.85 s, total: 3.92 s Wall time: 6min 10s
my_obj.get_df()
| AMI | ARI | NMI | accuracy | disc_ratio | graph connectivity broad.cell.types | graph connectivity omic_batch | homogeneity | il_score_clus | il_score_sil | mean_nodes | num_inf | pairwise_distance | sil_clus | sil_global | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| pbmc_liger | 0.504833 | 0.354824 | 0.505717 | 0.201328 | 0.0 | 0.627913 | 0.994087 | 0.610314 | NaN | NaN | 14.711608 | 0.0 | 54.844372 | 0.466547 | 0.512072 |
pbmc
AnnData object with n_obs × n_vars = 21984 × 2000
obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'X', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'log_n_genes', 'filter_barcodes', 'broad.cell.types', 'louvain2_rna', 'nb_features', 'log_nb_features', 'louvain_lsi', 'n_counts', 'louvain_pca', 'louvain', 'filter', 'batch', 'omic_batch', 'group', 'clusters', 'RNA_snn_res.0.3', 'seurat_clusters', 'euclidean_pairwise_distance_between_matching_barcodes'
var: 'vst.mean', 'vst.variance', 'vst.variance.expected', 'vst.variance.standardized', 'vst.variable'
uns: 'neighbors', 'average_dist_per_cluster', 'acc_cell_type', 'node_metrics'
obsm: 'X_iNMF', 'X_iNMF_raw', 'X_pca', 'X_umap'
varm: 'INMF_RAW', 'PCs'
obsp: 'distances', 'connectivities'
pbmc.uns['node_metrics'].keys()
dict_keys(['num_inf', 'mean_nodes', 'dists', 'nodes_count', 'mean_nodes_per_cell_type', 'disc_ratio'])
scmoib.pl.umap_barcodes(pbmc, bc_list1, bc_list2, color='omic_batch', edges_width=0.01)
... storing 'orig.ident' as categorical ... storing 'filter_barcodes' as categorical ... storing 'louvain2_rna' as categorical ... storing 'filter' as categorical ... storing 'group' as categorical
adata1 = ad.AnnData()
adata1.uns['node_metrics'] = {}
adata1.uns['node_metrics']['nodes_count'] = list(np.random.randint(0, 43, 10992))
scmoib.pl.cumulative_node_group([pbmc, adata1], legend=('PBMC_LIGER', 'TEST'))
scmoib.pl.cumulative_node(pbmc, 'PBMC liger')
my_obj.metrics['test'] = {}
my_obj.metrics['test']['disc_ratio'] = 0.25
print(my_obj.metrics)
df = my_obj.get_df()
scmoib.pl.cd_ratio(df)
{'pbmc_liger': {'pairwise_distance': 54.84437225373786, 'accuracy': 0.2013282387190684, 'graph connectivity omic_batch': 0.9940866084425036, 'graph connectivity broad.cell.types': 0.6279126635147454, 'AMI': 0.5048331010496683, 'ARI': 0.35482446371486587, 'NMI': 0.5057174898195305, 'homogeneity': 0.6103135222570035, 'sil_global': 0.5120715322309394, 'sil_clus': 0.4665468545502218, 'il_score_clus': nan, 'il_score_sil': nan, 'num_inf': 0, 'mean_nodes': 14.711608442503639, 'disc_ratio': 0.0}, 'test': {'disc_ratio': 0.25}}
scmoib.pl.node_distr(pbmc, title='PBMC liger')
scmoib.pl.mean_node_per_cell_type(pbmc, "PBMC LIGER")
scmoib.pl.dists_distr(pbmc)
scmoib.pl.river_plot(pbmc, source='broad.cell.types', target='seurat_clusters')
scmoib.pl.accuracy_per_cell_type(pbmc, pbmc.uns['acc_cell_type'], 'broad.cell.types')
scmoib.pl.metric_heatmap(my_obj.get_df().fillna(0), scale=True)
scmoib.pl.pairwise_distance(pbmc, metric='euclidean', cell_type='broad.cell.types')
... storing 'orig.ident' as categorical ... storing 'filter_barcodes' as categorical ... storing 'louvain2_rna' as categorical ... storing 'filter' as categorical ... storing 'group' as categorical
scmoib.pl.pairwise_distance(pbmc, metric='cosine', cell_type='broad.cell.types')
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-20-d742ab17341e> in <module> ----> 1 scmoib.pl.pairwise_distance(pbmc, metric='cosine', cell_type='broad.cell.types') ~/miniconda3/envs/scanpy_env/lib/python3.6/site-packages/scmoib-0.2.1-py3.6.egg/scmoib/plotting/_pairwise_distance.py in pairwise_distance(adata, metric, cell_type, title, rotation) 13 key = f'{metric}_pairwise_distance_between_matching_barcodes' 14 if key not in adata.obs.keys(): ---> 15 raise KeyError(f'Please compute the {metric} metric for your dataset') 16 17 sc.pl.violin(adata, keys=key, groupby=cell_type, title=title, rotation=rotation) KeyError: 'Please compute the cosine metric for your dataset'